Overview

This analysis performs differential abundance analysis on lipid profiles from maize leaf samples across multiple experimental factors:

  • Leaf tissue position: Continuous gradient from apical to basal (leaf 1-4)
  • Phosphorus treatment: High P (+P) vs Low P (-P)
  • Genotype: Control (CTRL) vs Inversion 4m (INV4M)

The analysis uses limma-voom to identify lipids that respond to:

  1. Leaf: Leaf tissue position effect
  2. -P: Phosphorus deficiency effect
  3. Inv4m: Genotype effect
  4. -P:Inv4m: Interaction between P treatment and genotype

Fold change thresholds:

  • Leaf effect: ±0.7 log2FC
  • Other effects: ±2 log2FC

Libraries

library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(cowplot)
library(ggpubr)
library(ggtext)

Data Loading and Preprocessing

Define Internal Standards to Exclude

# Internal standards and odd-chain lipids to exclude
internal_standards <- c( 
  "CUDA",
  "LPE_17_1",
  "Sphingosine_17_1",
  "PC_25_0",
  "DG_18_1",
  "PG_34_0",
  "DG_12_0",
  "TG_17_0",
  "LPC_17_0",
  "PE_34",
  "Cholesterol"
)

# exclude the odd carbon lipid
odd_carbon <- c( "PG_17_0", "SM_35_1","TG_57_6")

to_exclude <- c(internal_standards,odd_carbon)

Load Raw Lipid Data

# Load MS-Dial raw data with new internal standard integration
lipid_csv <- file.path(paths$data, "PSU_RawData_MSDial_NewStdInt_240422.csv")

raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf"))

# Remove internal standards and quality control samples
to_exclude <- to_exclude[to_exclude %in% colnames(raw)]

raw <- raw %>%
  dplyr::select(-all_of(to_exclude)) %>%
  filter(!grepl("Methanol", sampleID)) %>%
  filter(!grepl("Pool", sampleID))

cat("Loaded", nrow(raw), "samples\n")
## Loaded 43 samples

Load Sample Metadata

# Load plant phenotypic data
plant_csv <- file.path(paths$data, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")

psu <- read.csv(plant_csv) %>%
  dplyr::rename(Genotype = "Who.What", rowid = "P22.") %>%
  filter(Genotype %in% c("CTRL", "INV4M")) %>%
  droplevels()


# Load RNA-seq metadata
sampleInfo <- read.csv(file.path(paths$data, 'inv4mRNAseq_metadata.csv')) %>%
  dplyr::rename(Genotype = genotype) %>%
  dplyr::rename(rowid = row)

# Merge metadata
sampleInfo <- psu %>%
  dplyr::select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
  inner_join(sampleInfo) %>%
  filter(!is.na(DTA))

# Create leaf position groups
sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"

# Standardize sample IDs
sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)
sampleInfo$rowid <- sampleInfo$row

# mass spectrometry injection order
ms_order<- read.csv(file.path(paths$data, "PSU-PHO22_ms_order.csv"))
ms_order$tube <- gsub("L|R","S",ms_order$top_tag, perl =TRUE)
 sampleInfo$row
##  [1] 3056 3056 3056 3056 3059 3059 3059 3059 3080 3080 3080 3080 3083 3083 3083
## [16] 3083 3092 3092 3092 3092 3095 3095 3095 3095 3113 3113 3113 3113 3131 3131
## [31] 3131 3131 4080 4080 4080 4080 4083 4083 4083 4083 4104 4104 4104 4104 4107
## [46] 4107 4107 4107 4113 4113 4113 4113 4122 4122 4122 4122 4125 4125 4125 4125
## [61] 4131 4131 4131 4131
sampleInfo <- sampleInfo%>%
  dplyr::right_join(ms_order) %>% distinct()

cat("Sample info loaded for", nrow(sampleInfo), "samples\n")
## Sample info loaded for 53 samples

Merge Lipid Data with Metadata

# Extract tube ID from raw data
raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)

# Merge phenotypic data with lipid measurements
pheno <- sampleInfo %>%
  dplyr::select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue, leaf_group) %>%
  inner_join(
    raw %>%
      dplyr::select(tube, DGDG_34_0:TG_58_5)
  ) %>%
  mutate(block = as.factor(Rep)) %>%
  droplevels() %>%
  pivot_wider(
    names_from = Rep,
    values_from = Rep,
    names_prefix = "block_",
    values_fn = function(x) 1,
    values_fill = 0
  ) %>%
  dplyr::select(rowid, tube:Plot_Row, block, block_6:block_12, everything()) %>%
  mutate(Treatment = factor(Treatment, levels = c("High_P", "Low_P")))




# Simplify treatment labels
levels(pheno$Treatment) <- c("+P", "-P")

# Remove problematic sample
pheno <- pheno[-29, ]  # mislabelled for lipids?

cat("Final dataset:", nrow(pheno), "samples\n")
## Final dataset: 42 samples

Define Lipid Classifications

# Extract lipid measurements matrix
m <- pheno %>%
  dplyr::select(DGDG_34_0:TG_58_5) %>%
  as.matrix()

# Define lipid head groups
head_group <- c(
  DG = "neutral",
  DGDG = "glycolipid",
  DGGA = "glycolipid",
  LPC = "phospholipid",
  LPE = "phospholipid",
  MGDG = "glycolipid",
  PC = "phospholipid",
  PE = "phospholipid",
  PG = "phospholipid",
  PI = "phospholipid",
  SQDG = "glycolipid",
  TG = "neutral"
)

# Create lipid species annotation
sp <- data.frame(
  colname = colnames(m),
  class = gsub("_.*", "", colnames(m), perl = TRUE)
)

sp$head_group <- head_group[sp$class]

# Summary of lipid classes
cat("\nLipid class distribution:\n")
## 
## Lipid class distribution:
print(with(sp, table(head_group, class)))
##               class
## head_group     DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
##   glycolipid    0   12    9   0   0    8  0  0  0  0    9  0
##   neutral      22    0    0   0   0    0  0  0  0  0    0 25
##   phospholipid  0    0    0   6   3    0 19 12  6  4    0  0

Quality Control

Create DGEList Object

Critical step: The counts matrix comes from pheno, but we need to ensure the sample metadata in the DGEList matches the processed pheno data, especially for Treatment levels which were renamed from “High_P”/“Low_P” to “+P”/“-P”.

# Prepare count matrix
counts <- pheno[, colnames(m)] %>%
  as.matrix() %>%
  t()

# Visualize data distribution
hist(counts, main = "Distribution of Lipid Abundances", xlab = "Abundance")

# Set column names to match sample IDs
colnames(counts) <- pheno$tube

cat("\nCount matrix dimensions:", dim(counts), "\n")
## 
## Count matrix dimensions: 135 42
cat("Lipids:", nrow(counts), "\n")
## Lipids: 135
cat("Samples:", ncol(counts), "\n")
## Samples: 42

Match Sample Metadata to Counts

# Extract sample names from counts matrix
sampleNames <- pheno$tube

# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
## 
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]

# CRIionAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
  pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
  levels = c("+P", "-P")
)

cat("\nTreatment levels after update:\n")
## 
## Treatment levels after update:
print(levels(sampleInfo_matched$Treatment))
## [1] "+P" "-P"
print(table(sampleInfo_matched$Treatment))
## 
## +P -P 
## 15 27
# Create corrected matrix for plotting

sampleInfo$leaf_tissue_c <- scale(sampleInfo$leaf_tissue, center = TRUE, scale = FALSE)

filtered_sample_idx <- which(colnames(counts) %in% sampleInfo$tube)

filtered_covariates <- sampleInfo[
  filtered_sample_idx,
c("Plot_Row","injection_order")]

filtered_metadata <- sampleInfo[filtered_sample_idx,]
# 
# corrected_matrix <- removeBatchEffect(
#   counts,
#   covariates = filtered_covariates,
#   design = model.matrix(
#     ~ leaf_tissue_c * Treatment + Genotype * Treatment + 
#       leaf_tissue_c * Genotype,
#     data = filtered_metadata
#   )
# )
# 
# corrected_matrix <-  corrected_matrix - min( corrected_matrix)


# Use corrected_matrix for:
# - PCA plots
# - Heatmaps  
# - Boxplots of individual metabolites
# - Any visualization where drift would obscure biology

# But use original metabolite_matrix with full design for statistics
# design_full <- model.matrix(
#   ~ Plot_Row + injection_order + 
#     leaf_tissue_c * Treatment + Genotype * Treatment + 
#     leaf_tissue_c * Genotype,
#   data = metadata
# )
# 
# fit <- lmFit(metabolite_matrix, design_full)
# fit <- eBayes(fit)

Create DGEList with Matched Metadata

# Create gene annotation
genes <- data.frame(gene = rownames(counts))

# Create DGEList with matched sample information
y <- DGEList(counts = counts, samples = sampleInfo_matched )

# Create sample groups from Treatment and Genotype
y$group <- interaction(y$samples$Treatment, y$samples$Genotype)

cat("\nDGEList created successfully\n")
## 
## DGEList created successfully
cat("Groups:", levels(y$group), "\n")
## Groups: +P.CTRL -P.CTRL +P.INV4 -P.INV4
cat("\nLibrary size summary:\n")
## 
## Library size summary:
print(summary(y$samples$lib.size))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.231e+10 4.175e+10 5.655e+10 5.876e+10 7.080e+10 1.110e+11
cat("\nLow count samples:\n")
## 
## Low count samples:
print(table(y$samples$lowCount))
## < table of extent 0 >
# Keep lipids with sufficient abundance
# Calculate Total Ion Current fractions ~ molar fractions

ion_fraction <- sweep(y$counts, 2, colSums(y$counts), FUN = "/")
hist(log10(ion_fraction))

keep <- (rowMeans(ion_fraction==0)) < 0.20
sum(!keep)
## [1] 3
y_filtered <- y[keep, ]

{
  cat("\nabundance filtering:\n")
  cat("  Genes before:", nrow(y), "\n")
  cat("  Genes after:", nrow(y_filtered), "\n")
  cat("  Genes removed:", sum(!keep), "\n")
}
## 
## abundance filtering:
##   Genes before: 135 
##   Genes after: 132 
##   Genes removed: 3

Prepare Data Frame for ggplot MDS Visualizations

CRIionAL: We need to use the ORIGINAL y_filtered object for the metadata to ensure all samples are included in the MDS visualization, but use the coordinates from the MDS computed on all samples.

y_filtered$samples$lowCount <- y_filtered$samples$lib.size < 3e10

# Remove low quality samples
y_filtered_bySample <- y_filtered[, !y_filtered$samples$lowCount]

{
  cat("\nLow quality libraries:\n")
  table(y_filtered$samples$lowCount)
  cat("\nSamples after filtering:\n")
  cat("  Retained:", ncol(y_filtered_bySample), "\n")
  cat("  Removed:", sum(y_filtered$samples$lowCount), "\n")
}
## 
## Low quality libraries:
## 
## Samples after filtering:
##   Retained: 38 
##   Removed: 4
# Extract sample metadata from filtered object
d <- y_filtered_bySample$samples

# Ensure factors are properly set
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$Rep <- factor(d$Rep)

Lipid Class distribution

# Lipid Class Composition Analysis
# =================================

## Calculate class-level abundances from normalized data

# Extract normalized counts (ion fraction scaled)
norm_counts <- ion_fraction * 1e9

# Create metadata for normalized samples
norm_metadata <- d %>%
  dplyr::filter(tube %in% colnames(norm_counts))

# Transpose normalized counts and merge with lipid annotations
lipid_abundance <- norm_counts %>%
  t() %>%
  as.data.frame() %>%
  dplyr::mutate(tube = rownames(.)) %>%
  tidyr::pivot_longer(
    cols = -tube,
    names_to = "response",
    values_to = "abundance"
  ) %>%
  dplyr::inner_join(sp, by = c("response" = "colname")) %>%
  dplyr::inner_join(norm_metadata, by = "tube")

## Create color palette with variations within each head group

# Define base colors for each head group
base_colors <- c(
  glycolipid = "darkblue",
  phospholipid = "darkred",
  neutral = "darkorange"
)

# Function to generate color variations (lighter/darker shades)
generate_color_variations <- function(base_color, n) {
  if (n == 1) return(base_color)
  
  # Use colorRampPalette to create gradient from lighter to darker
  colorRampPalette(c(
    colorspace::lighten(base_color, 0.4),
    base_color,
    colorspace::darken(base_color, 0.3)
  ))(n)
}

# Get classes by head group from sp annotation
classes_by_headgroup <- sp %>%
  dplyr::select(class, head_group) %>%
  dplyr::distinct() %>%
  dplyr::arrange(head_group, class)

# Order classes by head group for consistent display
class_order <- classes_by_headgroup %>%
  dplyr::pull(class)

# Generate colors for each head group
class_colors <- c()
for (hg in names(base_colors)) {
  # Get classes in this head group
  hg_classes <- classes_by_headgroup %>%
    dplyr::filter(head_group == hg) %>%
    dplyr::pull(class)
  
  # Generate color variations
  if (length(hg_classes) > 0) {
    hg_colors <- generate_color_variations(base_colors[hg], length(hg_classes))
    names(hg_colors) <- hg_classes
    class_colors <- c(class_colors, hg_colors)
  }
}

# Reorder to match class_order
class_colors <- class_colors[class_order]

cat("\nGenerated", length(class_colors), "color variations\n")
## 
## Generated 12 color variations
cat("Colors by head group:\n")
## Colors by head group:
print(table(classes_by_headgroup$head_group))
## 
##   glycolipid      neutral phospholipid 
##            4            2            6
# Define custom green to orange palette for leaf positions
green_to_orange <- c(
  "#00954A",  # Spanish Green (Leaf 1)
  "#A4DF56",  # Inchworm (Leaf 2)
  "#D7E23C",  # Pear (Leaf 3)
  "#FF9A1F"   # Crayola's Bright Yellow/Orange (Leaf 4)
)

## Plot 1: Mean composition by Leaf position - grouped bars by leaf within each class

leaf_composition <- lipid_abundance %>%
  dplyr::group_by(tube, Treatment, leaf_tissue, class) %>%
  dplyr::summarise(class_sum = sum(abundance), .groups = "drop") %>%
  dplyr::group_by(tube, Treatment, leaf_tissue) %>%
  dplyr::mutate(class_pct = 100 * class_sum / sum(class_sum)) %>%
  dplyr::ungroup() %>%
  dplyr::group_by(Treatment, leaf_tissue, class) %>%
  dplyr::summarise(
    mean_pct = mean(class_pct),
    se_pct = sd(class_pct) / sqrt(n()),
    .groups = "drop"
  )

leaf_composition$class <- factor(leaf_composition$class, levels = class_order)

# Create leaf position factor
leaf_composition <- leaf_composition %>%
  dplyr::mutate(
    leaf_position = factor(
      leaf_tissue,
      levels = c(1, 2, 3, 4),
      labels = c("1", "2", "3", "4")
    )
  )

# Add head group for faceting
leaf_composition <- leaf_composition %>%
  dplyr::inner_join(
    sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
    by = "class"
  )

# Plot with lipid classes on x-axis, grouped by leaf position, log Y-scale
plot1_composition <- leaf_composition %>%
  ggplot(aes(x = class, y = mean_pct, fill = leaf_position)) +
  geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
  geom_errorbar(
    aes(ymin = mean_pct - se_pct, ymax = mean_pct + se_pct),
    position = position_dodge(width = 0.9),
    width = 0.25,
    linewidth = 0.5
  ) +
  scale_fill_manual(
    name = "Leaf",
    values = green_to_orange
  ) +
  scale_y_log10(
    breaks = c(0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 40),
    labels = c("0.05", "0.1", "0.2", "0.5", "1", "2", "5", "10", "20", "40")
  ) +
  labs(
    y = "Ion %"
  ) +
  facet_grid(rows = vars(Treatment), cols = vars(head_group), 
             scales = "free_x", space = "free_x") +
  theme_classic2(base_size = 14) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
    strip.background = element_blank(),
    strip.text.x = element_text(angle = 0, size = 25, face = "bold"),
    strip.text.y = element_text(angle = 0, size = 25, face = "bold"),
    legend.position = "none"
  )

## Plot 2: Treatment effect (-P vs +P) by leaf stage

# Calculate mean and SE per Treatment-Leaf-Class
class_abundance_summary <- lipid_abundance %>%
  dplyr::group_by(tube, Treatment, leaf_tissue, class) %>%
  dplyr::summarise(class_sum = sum(abundance), .groups = "drop") %>%
  dplyr::group_by(Treatment, leaf_tissue, class) %>%
  dplyr::summarise(
    mean_abundance = mean(class_sum),
    se_abundance = sd(class_sum) / sqrt(n()),
    n_samples = n(),
    .groups = "drop"
  )

# Pivot to get +P and -P side by side
treatment_comparison <- class_abundance_summary %>%
  tidyr::pivot_wider(
    names_from = Treatment,
    values_from = c(mean_abundance, se_abundance, n_samples)
  ) %>%
  dplyr::mutate(
    # Calculate log2 fold change: log2(-P / +P)
    log2fc = log2(`mean_abundance_-P` / `mean_abundance_+P`),
    
    # Propagate error using delta method
    # For log2(Y/X): SE ≈ 1/ln(2) × sqrt((SE_Y/Y)^2 + (SE_X/X)^2)
    se_log2fc = (1/log(2)) * sqrt(
      (`se_abundance_-P` / `mean_abundance_-P`)^2 + 
      (`se_abundance_+P` / `mean_abundance_+P`)^2
    )
  ) %>%
  dplyr::mutate(
    leaf_position = factor(
      leaf_tissue,
      levels = c(1, 2, 3, 4),
      labels = c("1", "2", "3", "4")
    )
  )

treatment_comparison$class <- factor(
  treatment_comparison$class, 
  levels = class_order
)

# Add head group
treatment_comparison <- treatment_comparison %>%
  dplyr::inner_join(
    sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
    by = "class"
  )

# Plot treatment effect
plot2_treatment_effect <- treatment_comparison %>%
  ggplot(aes(x = class, y = log2fc, fill = leaf_position)) +
  geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
  geom_errorbar(
    aes(ymin = log2fc - se_log2fc, ymax = log2fc + se_log2fc),
    position = position_dodge(width = 0.9),
    width = 0.25,
    linewidth = 0.5
  ) +
  scale_fill_manual(
    name = "Leaf",
    values = green_to_orange
  ) +
  labs(
    y = expression(log[2]~"FC (-P / +P)")
  ) +
  facet_grid(cols = vars(head_group), scales = "free_x", space = "free_x") +
  theme_classic2(base_size = 14) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
    strip.background = element_blank(),
    strip.text.x = element_blank(),
  )


## Plot 3: Relative change with paired fold changes per plant

# Get sample-level abundances with plant ID
class_abundance_samples <- lipid_abundance %>%
  dplyr::group_by(tube, NCSU_RNA_plant, Treatment, leaf_tissue, class) %>%
  dplyr::summarise(class_sum = sum(abundance), .groups = "drop")

# Pivot wide to get all leaves per plant
class_abundance_wide <- class_abundance_samples %>%
  dplyr::select(-tube) %>%
  tidyr::pivot_wider(
    names_from = leaf_tissue,
    values_from = class_sum,
    names_prefix = "leaf_"
  )

# Fill missing values with treatment-class mean for that leaf
class_abundance_filled <- class_abundance_wide %>%
  dplyr::group_by(Treatment, class) %>%
  dplyr::mutate(
    leaf_1 = ifelse(is.na(leaf_1), mean(leaf_1, na.rm = TRUE), leaf_1),
    leaf_2 = ifelse(is.na(leaf_2), mean(leaf_2, na.rm = TRUE), leaf_2),
    leaf_3 = ifelse(is.na(leaf_3), mean(leaf_3, na.rm = TRUE), leaf_3),
    leaf_4 = ifelse(is.na(leaf_4), mean(leaf_4, na.rm = TRUE), leaf_4)
  ) %>%
  dplyr::ungroup()

# Calculate log2 fold change per plant (keeping leaf 1 = 0 for symmetry)
class_relative_per_plant <- class_abundance_filled %>%
  dplyr::mutate(
    leaf1_log2fc = 0,
    leaf2_log2fc = log2(leaf_2 / leaf_1),
    leaf3_log2fc = log2(leaf_3 / leaf_1),
    leaf4_log2fc = log2(leaf_4 / leaf_1)
  ) %>%
  dplyr::select(NCSU_RNA_plant, Treatment, class, 
                leaf1_log2fc, leaf2_log2fc, leaf3_log2fc, leaf4_log2fc) %>%
  tidyr::pivot_longer(
    cols = starts_with("leaf"),
    names_to = "leaf_position",
    values_to = "log2fc"
  )

# Calculate mean and SE
class_leaf_ratio_treatment <- class_relative_per_plant %>%
  dplyr::group_by(Treatment, leaf_position, class) %>%
  dplyr::summarise(
    mean_log2fc = mean(log2fc, na.rm = TRUE),
    se_log2fc = sd(log2fc, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  dplyr::mutate(
    leaf_position = factor(
      leaf_position,
      levels = c("leaf1_log2fc", "leaf2_log2fc", "leaf3_log2fc", "leaf4_log2fc"),
      labels = c("1", "2", "3", "4")
    )
  )

class_leaf_ratio_treatment$class <- factor(
  class_leaf_ratio_treatment$class, 
  levels = class_order
)

# Add head group
class_leaf_ratio_treatment <- class_leaf_ratio_treatment %>%
  dplyr::inner_join(
    sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
    by = "class"
  )

# Plot with log2FC
plot3_leaf_relative <- class_leaf_ratio_treatment %>%
  ggplot(aes(x = class, y = mean_log2fc, fill = leaf_position)) +
  geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
  geom_errorbar(
    aes(ymin = mean_log2fc - se_log2fc, ymax = mean_log2fc + se_log2fc),
    position = position_dodge(width = 0.9),
    width = 0.25,
    linewidth = 0.5
  ) +
  scale_fill_manual(
    name = "Leaf",
    values = green_to_orange
  ) +
  labs(
    y = expression(log[2]~"FC vs Leaf 1")
  ) +
  facet_grid(rows = vars(Treatment), cols = vars(head_group), 
             scales = "free_x", space = "free_x") +
  theme_classic2(base_size = 14) +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
    strip.background = element_blank(),
    strip.text.x = element_blank(),
    strip.text.y = element_text(angle = 0, size = 25, face = "bold"),
    legend.position = "none"
  )


# Combine all three plots
final_combined <- plot_grid(
  plot1_composition,
  plot2_treatment_effect,
  plot3_leaf_relative,
  ncol = 1,
  labels = c("A", "B", "C"),
  label_size = 25,
  rel_heights = c(1, 0.5, 1),
  align = "v",
  axis = "lr"
)

# Save combined plot
ggsave(
  file.path(paths$figures, "lipid_composition_combined_final.svg"),
  final_combined,
  width = 10,
  height = 10,
  dpi = 150
)
cat("\nFinal combined plot created and saved\n")
## 
## Final combined plot created and saved
print(final_combined)

Initial MDS

# MDS with all samples (before library size filtering)
mds_all <- plotMDS(y_filtered, pch = 21, labels=y$samples$tube,plot = TRUE)

# Histogram of library sizes
hist(
  y$samples$lib.size / 1e6,
  main = "Library Size Distribution",
  xlab = "Library Size (millions of reads)",
  breaks = 20
)

{
cat("\nLibrary size summary:\n")
print(summary(y_filtered$samples$lib.size))
cat("\nSamples with lib.size < 20 million:", 
    sum(y_filtered$samples$lib.size < 2e7), "\n")
}
## 
## Library size summary:
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.231e+10 4.175e+10 5.655e+10 5.876e+10 7.080e+10 1.110e+11 
## 
## Samples with lib.size < 20 million: 0

MDS Colored by MS Injection Order

mds_qc_data <- y_filtered$samples %>%
  mutate(
    dim1 = mds_all$x,
    dim2 = mds_all$y,
    lib_size = lib.size / 1e10
  )

# Plot MDS colored by MS injection order

ggplot( mds_qc_data, aes(x = dim1, y = dim2, color =injection_order )) +
  geom_point(size = 3) +
  scale_color_viridis_c(option = "plasma") +
  labs(
    x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
    y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
    color = "Injection Order"
  ) +
  theme_classic2(base_size = 15)

Compute MDS Dimensions

MDS reveals sources of variation in gene abundance across samples. Dimensions 3 and 4 are extracted from eigenvectors scaled by square root of variance explained.

mds <- plotMDS(
  y,
  pch = 21,
  label = y$samples$side_tag,
  plot = FALSE
)

# Store MDS coordinates in sample data
d <- y_filtered$samples
d$dim1 <- mds$x
d$dim2 <- mds$y
d$dim3 <- mds$eigen.vectors[, 3] * sqrt(mds$var.explained[3])
d$dim4 <- mds$eigen.vectors[, 4] * sqrt(mds$var.explained[4])

# Prepare factors for plotting
d$Treatment <- factor(d$Treatment)
d$Genotype <- factor(d$Genotype)

# Variance explained by each dimension
tibble(
  dimension = paste("Dim", 1:4),
  var_explained = round(mds$var.explained[1:4], 4)
)
## # A tibble: 4 × 2
##   dimension var_explained
##   <chr>             <dbl>
## 1 Dim 1            0.377 
## 2 Dim 2            0.14  
## 3 Dim 3            0.101 
## 4 Dim 4            0.0784

MDS: Dimensions 1 and 2 Colored by Multiple Factors

Exploring which experimental factors drive the main dimensions of variation.

# Treatment
p1 <- ggplot(d, aes(x = dim2, y = dim3, color = Treatment)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Treatment")

# Row position
p2 <- ggplot(d, aes(x = dim2, y = dim3, color = injection_order, shape = Treatment)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Injection order")

# Collection time
p3 <- ggplot(d, aes(x = dim2, y = dim3, color = decimal_time)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Collection Time")

# Collector
p4 <- ggplot(d, aes(x = dim2, y = dim3, color = COLLECTOR)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Collector")

# Genotype
p5 <- ggplot(d, aes(x = dim2, y = dim3, color = Genotype)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Genotype")

# Leaf tissue
p6 <- d %>%
  mutate(leaf = factor(leaf_tissue)) %>%
  ggplot(aes(x = dim2, y = dim3, color = leaf)) +
  geom_point(size = 3) +
  theme_classic2(base_size = 15) +
  ggtitle("Leaf Tissue")

ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)

MDS: Primary Plot (Leaf Tissue and Treatment)

lipid_MDS_23 <- d %>%
  mutate(leaf = factor(leaf_tissue)) %>%
  ggplot(aes(x = dim2, y = dim3)) +
  ggtitle("Lipid MDS") +
  xlab(paste0("dim2 (", round(100 * mds$var.explained[2]), "%)")) +
  ylab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
  geom_point(aes(fill = leaf, shape = Treatment), size = 4) +
  scale_fill_manual(values= green_to_orange) +
  scale_shape_manual(values = c(21, 25)) +
  guides(
    shape = guide_legend( title = element_blank(), 
                          override.aes = list(fill= "black")),
    fill = guide_legend(
      title = "Leaf",
      order = 2,
      override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
    )
  ) +
  theme_classic2(base_size = 30) +
  theme(
    axis.text.x = element_blank(),  
    axis.ticks.x = element_blank(), # Remove x-axis ticks
    axis.text.y = element_blank(),  # Remove y-axis labels
    axis.ticks.y = element_blank(),  # Remove y-axis ticks
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "in"),
    legend.background = element_rect(fill = "transparent")
    #legend.position = c(0.2, 0.17)
  )

# Save growth curve plots
saveRDS(lipid_MDS_23 , file.path(paths$intermediate, "lipid_MDS_23.rds"))
cat("MDS plots saved to output/\n")
## MDS plots saved to output/
lipid_MDS_23

MDS: Dimensions 3 and 4

The third dimension separates by genotype.

# Set up genotype labels with italic formatting
labels <- c("CTRL", "*Inv4m*")
names(labels) <- c("CTRL", "INV4")

d %>%
  mutate(leaf = factor(leaf_tissue)) %>%
  ggplot(aes(x = dim3, y = dim4, fill = leaf, shape = Treatment)) +
  xlab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
  ylab(paste0("dim4 (", round(100 * mds$var.explained[4]), "%)")) +
  geom_point(size = 4) +
  scale_fill_manual(values= green_to_orange) +
  scale_shape_manual(values = c(21, 25)) +
  guides(
    shape = guide_legend( title = element_blank(), 
                          override.aes = list(fill= "black")),
    fill = guide_legend(
      title = "Leaf",
      order = 2,
      override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
    )
  ) +
  theme_classic2(base_size = 25) +
  theme(
    legend.box = "horizontal",
    legend.position = c(0.9, 0.8),
    legend.text = element_markdown(),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line")
  )

MDS Dimension Correlations

# Calculate correlations and p-values
mds_cor_results <- tibble(
  dimension = c("Dim2", "Dim3", "Dim4"),
  factor = c("Treatment", "Leaf tissue","Leaf tissue"),
  correlation = c(
    cor(d$dim2, as.numeric(d$Treatment)),
    cor(d$dim3, as.numeric(d$leaf_tissue)),
    cor(d$dim4, as.numeric(d$leaf_tissue))
  ),
  p_value = c(
    cor.test(d$dim2, as.numeric(d$Treatment))$p.value,
    cor.test(d$dim3, d$leaf_tissue)$p.value,
     cor.test(d$dim4, d$leaf_tissue)$p.value
  )
) %>%
  mutate(
    adj_p_value = p.adjust(p_value, method = "fdr"),
    correlation = round(correlation, 3),
    p_value = signif(p_value, 3),
    adj_p_value = signif(adj_p_value, 3)
  )

mds_cor_results
## # A tibble: 3 × 5
##   dimension factor      correlation  p_value adj_p_value
##   <chr>     <chr>             <dbl>    <dbl>       <dbl>
## 1 Dim2      Treatment         0.427 0.00478     0.00478 
## 2 Dim3      Leaf tissue      -0.556 0.000132    0.000396
## 3 Dim4      Leaf tissue       0.517 0.000461    0.000692

Differential abundance Analysis

Final Data Preparation for Model

CRIionAL: Before creating the design matrix, ensure Treatment levels are correctly set to match the original factor levels for proper interpretation.

# --- Verify experimental design variables ---

# Check that Plot_Column is present in the metadata
cat("\nPlot_Column present?", "Plot_Column" %in% colnames(d), "\n")
## 
## Plot_Column present? TRUE
cat("Plot_Column values:\n")
## Plot_Column values:
print(table(d$Plot_Column))
## 
##  3  4  5  6 
##  8 13  6 15
# Check that Plot_Row is present in the metadata
cat("\nPlot_Row present?", "Plot_Row" %in% colnames(d), "\n")
## 
## Plot_Row present? TRUE
cat("Plot_Row values:\n")
## Plot_Row values:
print(table(d$Plot_Row))
## 
##  1  2  3  4  5  6  7  8 
##  3  4  4  4 10  9  4  4
# --- Prepare treatment variable for model fitting ---

# Re-establish the factor levels for Treatment so that "+P" (control) is the baseline.
# This ensures the model interprets Treatment effects as "-P" relative to "+P".
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))

# --- Center the continuous leaf age variable ---

# Convert leaf_tissue (leaf age or developmental stage) to a centered numeric covariate.
# Centering subtracts the mean leaf age so that:
#   - the intercept represents abundance at the *average leaf age*,
#   - the Treatment coefficient corresponds to the treatment effect at mean leaf age,
#   - and collinearity between leaf age and Treatment is minimized.


# --- Verify treatment structure after recoding ---
cat("\nTreatment levels for model:\n")
## 
## Treatment levels for model:
print(levels(d$Treatment))
## [1] "+P" "-P"
print(table(d$Treatment))
## 
## +P -P 
## 15 27

Design Matrix and Normalization

# Extract sample names from counts matrix
sampleNames <- y_filtered_bySample$samples$tube

# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
## 
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]
sampleInfo_matched
##    rowid Rep Plot_Row Plot_Column DTA DTS      library tube rna_ctn rna_batch1
## 1   3056   6        3           3  77  78  L01_S1_L002  S01      49          1
## 2   3056   6        3           3  77  78  L02_S2_L002  S02     101          1
## 3   3056   6        3           3  77  78  L03_S3_L002  S03      41          1
## 6   3059   6        4           3  76  77  L06_S6_L002  S06      63          1
## 7   3059   6        4           3  76  77  L07_S7_L002  S07      71          1
## 8   3059   6        4           3  76  77  L08_S8_L002  S08      86          1
## 9   3080   7        6           4  76  78  L09_S9_L002  S09     269          1
## 11  3080   7        6           4  76  78 L11_S11_L002  S11      70          1
## 12  3080   7        6           4  76  78 L12_S12_L002  S12     179          1
## 13  3083   7        5           4  78  78 L13_S13_L002  S13      42          1
## 14  3083   7        5           4  78  78 L15_S15_L002  S15      41          1
## 16  3092   5        2           4  75  76 L17_S17_L002  S17     137          2
## 17  3092   5        2           4  75  76 L18_S18_L002  S18      61          2
## 18  3092   5        2           4  75  76 L19_S19_L002  S19      56          2
## 19  3092   5        2           4  75  76 L20_S20_L002  S20      38          2
## 20  3095   5        1           4  74  74 L21_S21_L002  S21     148          2
## 21  3095   5        1           4  74  74 L23_S23_L002  S23      53          2
## 22  3095   5        1           4  74  74 L24_S24_L002  S24     173          2
## 23  3113  11        6           5  80  81 L25_S25_L002  S25      88          2
## 24  3113  11        6           5  80  81 L26_S26_L002  S26      60          2
## 25  3113  11        6           5  80  81 L27_S27_L002  S27      56          2
## 26  3131  11        5           6  75  77 L29_S29_L002  S29     138          2
## 27  3131  11        5           6  75  77 L31_S31_L002  S31      42          2
## 28  3131  11        5           6  75  77 L32_S32_L002  S32      94          2
## 29  4113  11        6           5  73  74 L49_S45_L002  S49      96          4
## 31  4113  11        6           5  73  74 L51_S47_L002  S51     150          4
## 32  4113  11        6           5  73  74 L52_S48_L002  S52     146          4
## 34  4122  12        8           6  71  73 L54_S50_L002  S54      85          4
## 35  4122  12        8           6  71  73 L55_S51_L002  S55     101          4
## 36  4122  12        8           6  71  73 L56_S52_L002  S56     161          4
## 37  4125  12        7           6  69  71 L57_S53_L002  S57     182          4
## 38  4125  12        7           6  69  71 L58_S54_L002  S58     163          4
## 39  4125  12        7           6  69  71 L59_S55_L002  S59      95          4
## 40  4125  12        7           6  69  71 L60_S56_L002  S60     213          4
## 41  4131  11        5           6  69  71 L61_S57_L002  S61     120          4
## 42  4131  11        5           6  69  71 L62_S58_L002  S62     103          4
## 43  4131  11        5           6  69  71 L63_S59_L002  S63     186          4
## 44  4131  11        5           6  69  71 L64_S60_L002  S64      82          4
##    tube_n side NCSU_RNA_plant Treatment Genotype leaf_tissue side_tag
## 1       1    L              1     Low_P     CTRL           1   #NAME?
## 2       2    L              1     Low_P     CTRL           2   #NAME?
## 3       3    L              1     Low_P     CTRL           3   #NAME?
## 6       6    L              2     Low_P     INV4           2   #NAME?
## 7       7    L              2     Low_P     INV4           3   #NAME?
## 8       8    L              2     Low_P     INV4           4   #NAME?
## 9       9    L              3     Low_P     INV4           1   #NAME?
## 11     11    L              3     Low_P     INV4           3   #NAME?
## 12     12    L              3     Low_P     INV4           4   #NAME?
## 13     13    L              4     Low_P     CTRL           1   #NAME?
## 14     15    L              4     Low_P     CTRL           3   #NAME?
## 16     17    L              5     Low_P     INV4           1   #NAME?
## 17     18    L              5     Low_P     INV4           2   #NAME?
## 18     19    L              5     Low_P     INV4           3   #NAME?
## 19     20    L              5     Low_P     INV4           4   #NAME?
## 20     21    L              6     Low_P     CTRL           1   #NAME?
## 21     23    L              6     Low_P     CTRL           3   #NAME?
## 22     24    L              6     Low_P     CTRL           4   #NAME?
## 23     25    L              7     Low_P     CTRL           1   #NAME?
## 24     26    L              7     Low_P     CTRL           2   #NAME?
## 25     27    L              7     Low_P     CTRL           3   #NAME?
## 26     29    L              8     Low_P     INV4           1   #NAME?
## 27     31    L              8     Low_P     INV4           3   #NAME?
## 28     32    L              8     Low_P     INV4           4   #NAME?
## 29     49    L             13    High_P     CTRL           1   #NAME?
## 31     51    L             13    High_P     CTRL           3   #NAME?
## 32     52    L             13    High_P     CTRL           4   #NAME?
## 34     54    L             14    High_P     CTRL           2   #NAME?
## 35     55    L             14    High_P     CTRL           3   #NAME?
## 36     56    L             14    High_P     CTRL           4   #NAME?
## 37     57    L             15    High_P     INV4           1   #NAME?
## 38     58    L             15    High_P     INV4           2   #NAME?
## 39     59    L             15    High_P     INV4           3   #NAME?
## 40     60    L             15    High_P     INV4           4   #NAME?
## 41     61    L             16    High_P     INV4           1   #NAME?
## 42     62    L             16    High_P     INV4           2   #NAME?
## 43     63    L             16    High_P     INV4           3   #NAME?
## 44     64    L             16    High_P     INV4           4   #NAME?
##          TIME decimal_time COLLECTOR      NOTE  S     Plot std_count_06102022
## 1  12H 37M 0S     12.61667    SERGIO            1   PlotVI                  4
## 2  12H 42M 0S     12.70000    SERGIO            2   PlotVI                  4
## 3  12H 43M 0S     12.71667    SERGIO            3   PlotVI                  4
## 6  12H 48M 0S     12.80000    SERGIO           NA   PlotVI                  4
## 7  12H 48M 0S     12.80000    SERGIO           NA   PlotVI                  4
## 8  12H 50M 0S     12.83333    SERGIO           NA   PlotVI                  4
## 9  12H 50M 0S     12.83333    SERGIO           NA   PlotVI                  6
## 11 12H 53M 0S     12.88333    SERGIO           NA   PlotVI                  6
## 12 12H 54M 0S     12.90000    SERGIO           NA   PlotVI                  6
## 13 12H 55M 0S     12.91667    SERGIO            5   PlotVI                  7
## 14 12H 57M 0S     12.95000    SERGIO ADD BEADS  7   PlotVI                  7
## 16  13H 5M 0S     13.08333  RUAIRIDH           NA   PlotVI                  7
## 17  13H 6M 0S     13.10000  RUAIRIDH           NA   PlotVI                  7
## 18  13H 7M 0S     13.11667  RUAIRIDH           NA   PlotVI                  7
## 19  13H 7M 0S     13.11667  RUAIRIDH           NA   PlotVI                  7
## 20  13H 9M 0S     13.15000  RUAIRIDH            9   PlotVI                  2
## 21 13H 10M 0S     13.16667  RUAIRIDH           11   PlotVI                  2
## 22 13H 11M 0S     13.18333  RUAIRIDH           12   PlotVI                  2
## 23 13H 12M 0S     13.20000  RUAIRIDH           13   PlotVI                  9
## 24 13H 13M 0S     13.21667  RUAIRIDH           14   PlotVI                  9
## 25 13H 16M 0S     13.26667  RUAIRIDH           15   PlotVI                  9
## 26 13H 18M 0S     13.30000  RUAIRIDH           NA   PlotVI                 10
## 27 13H 20M 0S     13.33333  RUAIRIDH           NA   PlotVI                 10
## 28 13H 21M 0S     13.35000  RUAIRIDH           NA   PlotVI                 10
## 29  13H 5M 0S     13.08333    SERGIO           25 PlotVIII                  3
## 31  13H 7M 0S     13.11667    SERGIO           27 PlotVIII                  3
## 32  13H 8M 0S     13.13333    SERGIO           28 PlotVIII                  3
## 34 13H 10M 0S     13.16667    SERGIO           30 PlotVIII                  6
## 35 13H 11M 0S     13.18333    SERGIO           31 PlotVIII                  6
## 36 13H 12M 0S     13.20000    SERGIO ADD BEADS 32 PlotVIII                  6
## 37 13H 12M 0S     13.20000    SERGIO           NA PlotVIII                  6
## 38 13H 14M 0S     13.23333    SERGIO           NA PlotVIII                  6
## 39 13H 15M 0S     13.25000    SERGIO           NA PlotVIII                  6
## 40 13H 16M 0S     13.26667    SERGIO           NA PlotVIII                  6
## 41 13H 17M 0S     13.28333    SERGIO           NA PlotVIII                  5
## 42 13H 20M 0S     13.33333    SERGIO           NA PlotVIII                  5
## 43 13H 21M 0S     13.35000    SERGIO           NA PlotVIII                  5
## 44 13H 22M 0S     13.36667    SERGIO           NA PlotVIII                  5
##    RNA_Q     borde notes leaf_number RNA_P leaf_group analysis_sampleID pool
## 1      3 variacion    NA           7    26     apical      230113_1_R01    1
## 2      3 variacion    NA           7    26     apical      230113_1_R02    2
## 3      3 variacion    NA           7    26      basal      230113_1_R03    3
## 6      5              NA           8    16     apical      230113_2_R06    5
## 7      5              NA           8    16      basal      230113_2_R07    2
## 8      5              NA           8    16      basal      230113_2_R08    2
## 9      4 variacion    NA           7    22     apical      230113_3_R09    1
## 11     4 variacion    NA           7    22      basal      230113_3_R11    2
## 12     4 variacion    NA           7    22      basal      230113_3_R12    3
## 13     5              NA           7    18     apical      230113_4_R13    3
## 14     5              NA           7    18      basal      230113_4_R15    3
## 16     4              NA           7    23     apical      230113_5_R17    1
## 17     4              NA           7    23     apical      230113_5_R18    4
## 18     4              NA           7    23      basal      230113_5_R19    2
## 19     4              NA           7    23      basal      230113_5_R20    5
## 20     2     borde    NA          NA    30     apical      230113_6_R21    3
## 21     2     borde    NA          NA    30      basal      230113_6_R23    2
## 22     2     borde    NA          NA    30      basal      230113_6_R24    4
## 23     5              NA           8    20     apical      230113_7_R25    4
## 24     5              NA           8    20     apical      230113_7_R26    3
## 25     5              NA           8    20      basal      230113_7_R27    1
## 26     4              NA           7    25     apical      230113_8_R29    4
## 27     4              NA           7    25      basal      230113_8_R31    2
## 28     4              NA           7    25      basal      230113_8_R32    3
## 29     4              NA           8    10     apical     230113_13_R49    1
## 31     4              NA           8    10      basal     230113_13_R51    4
## 32     4              NA           8    10      basal     230113_13_R52    2
## 34     4     borde    NA           8    11     apical     230113_14_R54    4
## 35     4     borde    NA           8    11      basal     230113_14_R55    2
## 36     4     borde    NA           8    11      basal     230113_14_R56    4
## 37     4              NA           8    12     apical     230113_15_R57    5
## 38     4              NA           8    12     apical     230113_15_R58    3
## 39     4              NA           8    12      basal     230113_15_R59    1
## 40     4              NA           8    12      basal     230113_15_R60    3
## 41     5              NA           8     4     apical     230113_16_R61    4
## 42     5              NA           8     4     apical     230113_16_R62    2
## 43     5              NA           8     4      basal     230113_16_R63    4
## 44     5              NA           8     4      basal     230113_16_R64    1
##    top_tag lipid_cluster injection_sample_id injection_order leaf_tissue_c
## 1      R01             1        230113_1_R01               3          -1.5
## 2      R02             1        230113_1_R02              19          -0.5
## 3      R03             1        230113_1_R03              29           0.5
## 6      R06             2        230113_2_R06              52          -0.5
## 7      R07             1        230113_2_R07              14           0.5
## 8      R08             1        230113_2_R08              21           1.5
## 9      R09             1        230113_3_R09              10          -1.5
## 11     R11             1        230113_3_R11              22           0.5
## 12     R12             2        230113_3_R12              34           1.5
## 13     R13             2        230113_4_R13              30          -1.5
## 14     R15             1        230113_4_R15              27           0.5
## 16     R17             1        230113_5_R17               6          -1.5
## 17     R18             2        230113_5_R18              41          -0.5
## 18     R19             1        230113_5_R19              15           0.5
## 19     R20             2        230113_5_R20              53           1.5
## 20     R21             2        230113_6_R21              31          -1.5
## 21     R23             1        230113_6_R23              13           0.5
## 22     R24             2        230113_6_R24              37           1.5
## 23     R25             2        230113_7_R25              43          -1.5
## 24     R26             1        230113_7_R26              26          -0.5
## 25     R27             1        230113_7_R27               5           0.5
## 26     R29             2        230113_8_R29              44          -1.5
## 27     R31             1        230113_8_R31              18           0.5
## 28     R32             1        230113_8_R32              25           1.5
## 29     R49             1       230113_13_R49               8          -1.5
## 31     R51             2       230113_13_R51              38           0.5
## 32     R52             1       230113_13_R52              20           1.5
## 34     R54             2       230113_14_R54              42          -0.5
## 35     R55             1       230113_14_R55              17           0.5
## 36     R56             2       230113_14_R56              45           1.5
## 37     R57             2       230113_15_R57              51          -1.5
## 38     R58             2       230113_15_R58              28          -0.5
## 39     R59             1       230113_15_R59               7           0.5
## 40     R60             2       230113_15_R60              33           1.5
## 41     R61             2       230113_16_R61              46          -1.5
## 42     R62             1       230113_16_R62              16          -0.5
## 43     R63             2       230113_16_R63              40           0.5
## 44     R64             1       230113_16_R64               4           1.5
# CRIionAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
  pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
  levels = c("+P", "-P")
)

  # Convert to "counts-like" scale (multiply by 1e9 for numerical stability)
  scaled_fractions <- ion_fraction[,sampleInfo_matched$tube] * 1e9

  # Create DGEList without normalization
  y_molar <- DGEList(counts = scaled_fractions,  samples = sampleInfo_matched)
  y_molar$samples$norm.factors <- 1

# Design matrix: spatial covariates + biological factors + interaction

# Column correction:

d <- sampleInfo_matched
# Define block: column nested within treatment
block <- interaction(d$Treatment, d$Plot_Column)


design <- model.matrix(
   ~  Plot_Row + injection_order + leaf_tissue_c*Treatment + Genotype*Treatment + leaf_tissue_c*Genotype,
  d
)


cat("\nDesign matrix dimensions:", dim(design), "\n")
## 
## Design matrix dimensions: 38 9
cat("\nCoefficients in model:\n")
## 
## Coefficients in model:
print(colnames(design))
## [1] "(Intercept)"                "Plot_Row"                  
## [3] "injection_order"            "leaf_tissue_c"             
## [5] "Treatment-P"                "GenotypeINV4"              
## [7] "leaf_tissue_c:Treatment-P"  "Treatment-P:GenotypeINV4"  
## [9] "leaf_tissue_c:GenotypeINV4"
cat("\nDesign matrix summary:\n")
## 
## Design matrix summary:
print(head(design))
##   (Intercept) Plot_Row injection_order leaf_tissue_c Treatment-P GenotypeINV4
## 1           1        3               3          -1.5           1            0
## 2           1        3              19          -0.5           1            0
## 3           1        3              29           0.5           1            0
## 6           1        4              52          -0.5           1            1
## 7           1        4              14           0.5           1            1
## 8           1        4              21           1.5           1            1
##   leaf_tissue_c:Treatment-P Treatment-P:GenotypeINV4 leaf_tissue_c:GenotypeINV4
## 1                      -1.5                        0                        0.0
## 2                      -0.5                        0                        0.0
## 3                       0.5                        0                        0.0
## 6                      -0.5                        1                       -0.5
## 7                       0.5                        1                        0.5
## 8                       1.5                        1                        1.5
# Calculate normalization factors
# y <- calcNormFactors(y_molar)

cat("\nNormalization factors calculated\n")
## 
## Normalization factors calculated
cat("Norm factors summary:\n")
## Norm factors summary:
# print(summary(y$samples$norm.factors))

Voom Transformation and Linear Modeling

# Apply voom transformation
# Voom transformation with precision weights
voomR <- voom(y_molar, design = design, plot = FALSE)


# --- 4. Estimate consensus correlation for blocks ---
corfit <- duplicateCorrelation(voomR, design, block = block)

# --- 5. Fit linear model accounting for block correlation ---
fit <- lmFit(voomR, design, block = block, correlation = corfit$consensus.correlation)

# --- 6. Apply empirical Bayes moderation ---
ebfit <- eBayes(fit)

cat("\nModel fitted successfully\n")
## 
## Model fitted successfully
{
  cat("Model fitted:", nrow(fit$coefficients), "genes ×", 
      ncol(fit$coefficients), "coefficients\n")
  cat("\nSignificant genes per coefficient (FDR < 0.05):\n")
  print(colSums(abs(decideTests(ebfit))))
}
## Model fitted: 135 genes × 9 coefficients
## 
## Significant genes per coefficient (FDR < 0.05):
##                (Intercept)                   Plot_Row 
##                        126                          0 
##            injection_order              leaf_tissue_c 
##                         51                          3 
##                Treatment-P               GenotypeINV4 
##                         38                          0 
##  leaf_tissue_c:Treatment-P   Treatment-P:GenotypeINV4 
##                          4                          0 
## leaf_tissue_c:GenotypeINV4 
##                          0

Extract Results for Each Predictor

#' Extract differential abundance results for specified predictors
#'
#' @param ebfit An eBayes fitted model object from limma
#' @param predictor_map Named vector mapping coefficient names to display names
#'
#' @return A data frame with DE results for all specified predictors
extract_predictor_effects <- function(ebfit, predictor_map) {
  # Get coefficient names from the model
  coef_names <- colnames(coef(ebfit))
  
  # Match predictor names to coefficient positions
  coef_indices <- match(names(predictor_map), coef_names)
  
  # Validate all predictors found
  if (any(is.na(coef_indices))) {
    missing <- names(predictor_map)[is.na(coef_indices)]
    stop("Coefficients not found: ", paste(missing, collapse = ", "),
         "\nAvailable: ", paste(coef_names, collapse = ", "))
  }
  
  # Extract results for each predictor
  result_list <- lapply(seq_along(coef_indices), function(i) {
    idx <- coef_indices[i]
    
    tt <- topTable(ebfit, coef = idx, sort.by = "none", n = Inf)
    
    # Calculate 95% confidence intervals
    crit_value <- qt(0.975, ebfit$df.residual + ebfit$df.prior)
    std_errors <- ebfit$stdev.unscaled[, idx] * sqrt(ebfit$s2.post)
    
    tt$upper <- tt$logFC + crit_value * std_errors
    tt$lower <- tt$logFC - crit_value * std_errors
    tt$predictor <- predictor_map[i]
    tt$response <- rownames(tt)
    tt$std_err <- std_errors
    tt
  })
  
  # Combine and format
  results<- do.call(rbind, result_list)
  rownames(effects) <- NULL
  
  results %>%
    dplyr::select(predictor, response, everything()) %>%
    arrange(adj.P.Val)
}
# Map coefficients (use names from coef(), not topTable())
predictor_map <- c(
  "leaf_tissue_c" = "Leaf",
  "Treatment-P" = "-P",
  "leaf_tissue_c:Treatment-P" = "Leaf:-P"
)

# Verify coefficient names match
cat("Available coefficients:\n")
## Available coefficients:
print(colnames(coef(ebfit)))
## [1] "(Intercept)"                "Plot_Row"                  
## [3] "injection_order"            "leaf_tissue_c"             
## [5] "Treatment-P"                "GenotypeINV4"              
## [7] "leaf_tissue_c:Treatment-P"  "Treatment-P:GenotypeINV4"  
## [9] "leaf_tissue_c:GenotypeINV4"
effects_df <- extract_predictor_effects(ebfit, predictor_map)

# Summary
cat("\nTotal tests:", nrow(effects_df), "\n")
## 
## Total tests: 405
cat("Tests per predictor:\n")
## Tests per predictor:
print(table(effects_df$predictor))
## 
##      -P    Leaf Leaf:-P 
##     135     135     135
cat("\nSignificant per predictor (FDR < 0.05):\n")
## 
## Significant per predictor (FDR < 0.05):
print(table(effects_df$predictor[effects_df$adj.P.Val < 0.05]))
## 
##      -P    Leaf Leaf:-P 
##      38       3       4

Process Effects and Define Significance

#' Classify differential lipids with predictor-specific thresholds
#'
#' Applies two-tier classification:
#' - Tier 1: Significant DLs (FDR < 0.05)
#' - Tier 2: High-confidence DLs (significant + large effect size)
#'
#' Effect size thresholds:
#' - Leaf predictors: |logFC| > 0.5 (equivalent to |3β| > 1.5 across range)
#' - Categorical predictors: |logFC| > 1.5
classify_differential_lipids <- function(effects_df) {
  # Define predictor types
  leaf_predictors <- c("Leaf", "Leaf:-P")
  
  effects_df %>%
    mutate(
      # Significance
      is_DL = adj.P.Val < 0.05,
      # High-confidence based on predictor type
      is_hiconf_DL = case_when(
        predictor %in% leaf_predictors &  is_DL & abs(logFC) > 0.5 ~ TRUE,
        predictor =="-P" &  is_DL & abs(logFC) > 1.5 ~ TRUE,
        .default = FALSE
      ),
      
      # Direction of effect
      regulation = case_when(
        !is_hiconf_DL ~ "Not significant",
        logFC > 0 ~ "Upregulated",
        logFC < 0 ~ "Downregulated",
        TRUE ~ "Not significant"
      ),
      
      # Negative log P-value for plotting
      neglogP = -log10(adj.P.Val)
    ) %>%
  mutate(label = if_else(is_DL, response, ""))
  
}
# Define predictor order
effect_order <- c("Leaf", "-P", "Leaf:-P")

# Apply classification
effects_df <- effects_df %>%
  mutate(predictor = factor(predictor, levels = effect_order)) %>%
  classify_differential_lipids()

# Format labels for plotting
effects_df$label <- sub("_", "", effects_df$label, perl = TRUE)
effects_df$label <- sub("_", ":", effects_df$label, perl = TRUE)

# Summary statistics
cat("Classification summary:\n")
## Classification summary:
cat("Total tests:", nrow(effects_df), "\n\n")
## Total tests: 405
cat("Tier 1 - Significant DLs (FDR < 0.05):\n")
## Tier 1 - Significant DLs (FDR < 0.05):
print(table(effects_df$predictor, effects_df$is_DL))
##          
##           FALSE TRUE
##   Leaf      132    3
##   -P         97   38
##   Leaf:-P   131    4
cat("\nTier 2 - High-confidence DLs:\n")
## 
## Tier 2 - High-confidence DLs:
print(table(effects_df$predictor, effects_df$is_hiconf_DL))
##          
##           FALSE TRUE
##   Leaf      132    3
##   -P        112   23
##   Leaf:-P   131    4
cat("\nDirection of significant effects:\n")
## 
## Direction of significant effects:
print(with(effects_df %>% filter(is_DL), 
           table(predictor, regulation)))
##          regulation
## predictor Downregulated Not significant Upregulated
##   Leaf                0               0           3
##   -P                 12              15          11
##   Leaf:-P             1               0           3

Volcano Plots

Custom Legend Key Function

# Custom key glyph for legend
draw_key_custom <- function(data, params, size) {
  colour <- data$colour
  data <- data[data$colour == colour, ]
  grid::segmentsGrob(
    0, 1 / 2, 1, 1 / 2,
    gp = grid::gpar(
      col = data$colour,
      lwd = data$linewidth * 2
    )
  )
}
#' Create volcano plot with staged label placement
#'
#' Plots downregulated labels to the left (xlim < -1.5) and upregulated
#' labels to the right (xlim > 1.5). The -P predictor uses unrestricted
#' labeling.
#'
#' @return A ggplot object with faceted volcano plots

plot_data <- effects_df %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))
effects_df$is
## NULL
label_data <- plot_data %>%
  filter(is_hiconf_DL)

write.csv(
  label_data,
  file = file.path(paths$intermediate, "high_confidence_DLs.csv"),
  row.names = FALSE
)

force <- 1
label_size <-6

lipid_volcano_3 <- plot_data %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  # Stage 1: Downregulated labels (left side)
  geom_text_repel(
    data = label_data  %>% 
      filter(predictor == "Leaf", regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(NA, -0.5),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Stage 2: Upregulated labels (right side)
  geom_text_repel(
    data = label_data  %>% 
      filter(predictor == "Leaf", regulation == "Upregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(0.5, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
   # Stage 1: Downregulated labels (left side)
  geom_text_repel(
    data = label_data  %>% 
      filter(predictor == "-P", regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    direction = "y",
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(NA, -4),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Stage 2: Upregulated labels (right side)
  geom_text_repel(
    data = label_data  %>% 
      filter(predictor == "-P", regulation == "Upregulated", !is.na(label), label != ""),
        force = force,
    fontface = "bold",
    size=label_size,
    direction = "y",
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(4, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Separate: -P predictor without xlim restrictions
  geom_text_repel(
    data = label_data  %>% 
      filter(predictor == "Leaf:-P", !is.na(label), label != ""),
        force = force,
    fontface = "bold",
    size=label_size,
    segment.color="grey",
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5)),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult=c(0,0.1)))+
  scale_x_continuous(expand = expansion(mult=c(0.3,0.05)))+
  theme_classic2(base_size = 30) +
  theme(
    plot.title = element_blank(),
    # strip.text = element_blank(),
    strip.text = element_markdown( size=35),
    legend.position = c(0.85, 0.95),
    legend.background = element_rect(fill = "transparent"),
    #axis.title = element_text(size=25),
    axis.title.x = element_blank(),
    #legend.position = "top",
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 25),
    axis.title.y = element_text(margin = margin(r = -5)),
    plot.margin = margin(5.5, 5.5, 0, 5.5, "pt"),
    strip.background = element_blank()
  )

# Save growth curve plots
saveRDS(lipid_volcano_3, file.path(paths$intermediate, "lipid_volcano_3.rds"))

print(lipid_volcano_3)

## Volcano Plot: All Three Predictors (Excluding Interaction)

Volcano Plot: Leaf and -P Only

This focused plot excludes Leaf x -P results to better visualize differential lipids in response to leaf tissue position and phosphorus deficiency.

#' Create volcano plot with staged label placement
#'
#' Plots downregulated labels to the left (xlim < -1.5) and upregulated
#' labels to the right (xlim > 1.5). The -P predictor uses unrestricted
#' labeling.
#'
#' @return A ggplot object with faceted volcano plots

plot_data <- effects_df %>%
  filter(predictor!="Leaf:-P") %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))

force <- 1
dot_size <- 5

volcano_2 <- plot_data %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  # Stage 1: Downregulated labels (left side)
  geom_text_repel(
    data = plot_data %>% 
      filter(predictor == "Leaf", regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(NA, -0.5),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Stage 2: Upregulated labels (right side)
  geom_text_repel(
    data = plot_data %>% 
      filter(predictor == "Leaf", regulation == "Upregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(0.5, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
   # Stage 1: Downregulated labels (left side)
  geom_text_repel(
    data = plot_data %>% 
      filter(predictor == "-P", regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size=label_size,
    direction = "y",
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(NA, -4),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Stage 2: Upregulated labels (right side)
  geom_text_repel(
    data = plot_data %>% 
      filter(predictor == "-P", regulation == "Upregulated", !is.na(label), label != ""),
    force = 2,
    fontface = "bold",
    size=label_size,
    direction = "y",
    segment.color="grey",
    min.segment.length = 0,
    xlim = c(4, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5),reverse = TRUE),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult=c(0,0.3)))+
  scale_x_continuous(expand = expansion(mult=c(0.3,0.02)))+
  theme_classic2(base_size = 30) +
  theme(
    plot.title = element_blank(),
    # strip.text = element_blank(),
    strip.text = element_markdown( size=35),
    legend.position = c(0.12, 0.85),
    legend.background = element_rect(fill = "transparent"),
    axis.title = element_text(size=25),
    #legend.position = "top",
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 20),
    #legend.direction = "horizontal",
    axis.title.y = element_text(margin = margin(r = -5)),
    strip.background = element_blank()
  )

print(volcano_2)

ggsave(
  filename = file.path(paths$figures, "volcano_2_leafxP.png"),
  plot = volcano_2,
  height = 3,
  width = 5,
  units = "in",
  dpi = 150
)
#' Create volcano plot for Leaf:-P interaction
#'
#' Plots labels without xlim restrictions for the Leaf:-P interaction effect.
#'
#' @return A ggplot object for Leaf:-P volcano plot
plot_data_leafP <- effects_df %>%
  filter(predictor == "Leaf:-P") %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))

force <- 1

volcano_leafP <- plot_data_leafP %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  ylim(0, 5) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  # Downregulated labels (no xlim restriction)
  geom_text_repel(
    data = plot_data_leafP %>% 
      filter(regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = 7,
    segment.color = "grey",
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  # Upregulated labels (no xlim restriction)
  geom_text_repel(
    data = plot_data_leafP %>% 
      filter(regulation == "Upregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = 7,
    segment.color = "grey",
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("orange2", "darkred", "darkblue" ),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5), reverse = TRUE),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
  theme_classic2(base_size = 30) +
  theme(
    plot.title = element_blank(),
    # strip.text = element_blank(),
    strip.text = element_markdown( size=35),
    legend.position = "none",
    legend.background = element_rect(fill = "transparent"),
    axis.title = element_text(size = 25),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 20),
    axis.title.y = element_text(margin = margin(r = -5)),
    strip.background = element_blank()
  )

print(volcano_leafP)

ggsave(
  filename = file.path(paths$figures, "volcano_leafP.png"),
  plot = volcano_leafP,
  height = 7,
  width = 7,
  units = "in",
  dpi = 150
)

Effect Size Plots

Prepare Data for Effect Plots

# Filter significant effects and prepare for plotting
to_plot <- effects_df %>%
  mutate(response = gsub("_glyco", "", response)) %>%
  mutate(predictor = factor(
    predictor,
    levels = c("Leaf", "-P", "Leaf:-P","Inv4m", "-P:Inv4m")
  )) %>%
  filter(is_hiconf_DL) %>%
  inner_join(sp, by = c(response = "colname")) %>%
  mutate(sign = sign(logFC)) %>%
  ungroup() %>%
  group_by(response) %>%
  mutate(max_effect = max(abs(logFC))) %>%
  ungroup() %>%
  group_by(predictor, head_group, class) %>%
  mutate(head_group = fct_reorder(head_group, adj.P.Val)) %>%
  ungroup() %>%
  group_by(predictor) %>%
  arrange(predictor, head_group, class, adj.P.Val) %>%
  mutate(.r = row_number()) %>%
  ungroup()

cat("\nSignificant effects per predictor:\n")
## 
## Significant effects per predictor:
print(with(to_plot, table(predictor, head_group)))
##           head_group
## predictor  phospholipid glycolipid neutral
##   Leaf                3          0       0
##   -P                 10          6       7
##   Leaf:-P             1          0       3
##   Inv4m               0          0       0
##   -P:Inv4m            0          0       0

Effect Size Plot: All Predictors

pd <- position_dodge(0.4)

effect_plot_all <- to_plot %>%
  droplevels() %>%
  mutate(response = fct_reorder(response, .r)) %>%
  ungroup() %>%
  ggplot(aes(
    x = logFC,
    y = response,
    col = head_group
  )) +
  xlab("Effect (log2 Fold Change)") +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = pd, size = 3) +
  geom_errorbar(
    aes(xmin = upper, xmax = lower),
    position = pd,
    width = 0.2,
    linewidth = 0.7
  ) +
  guides(
    shape = guide_legend(title = NULL),
    color = guide_legend(reverse = TRUE)
  ) +
  facet_wrap(. ~ predictor, scales = "free", ncol = 4) +
  scale_y_discrete(limits = rev) +
  scale_color_manual(values = c("blue", "red", "chartreuse")) +
  theme_light(base_size = 15) +
  theme(
    legend.position = "top",
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(color = "black", size = 15),
    axis.title.y = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

print(effect_plot_all)

Export Results

# Export differential abundance results
write.csv(
  voomR$E,
  file = file.path(paths$intermediate, "voom_normalised_lipids.csv"),
  row.names = TRUE
)

# Export differential abundance results
write.csv(
  effects_df,
  file = file.path(paths$intermediate, "lipid_differential_abundance_results.csv"),
  row.names = FALSE
)

# Save volcano plots (alternative export - disabled)
# ggsave(
#   volcano_3,
#   file = file.path(paths$figures, "lipid_volcano_all.svg"),
#   height = 6,
#   width = 12
# )

ggsave(
  volcano_leafP,
  file = file.path(paths$figures, "lipid_volcano_leaf_P.svg"),
  height = 6,
  width = 10
)

ggsave(
  effect_plot_all,
  file = file.path(paths$figures, "lipid_effects_all.svg"),
  height = 10,
  width = 12
)

Summary Statistics

# Count differential lipids by class and regulation
summary_table <- effects_df %>%
  filter(is_DL) %>%
  inner_join(sp, by = c(response = "colname")) %>%
  group_by(predictor, head_group, regulation) %>%
  summarise(n_lipids = n(), .groups = "drop") %>%
  arrange(predictor, head_group, regulation)

print(summary_table)
## # A tibble: 10 × 4
##    predictor head_group   regulation      n_lipids
##    <fct>     <chr>        <chr>              <int>
##  1 Leaf      phospholipid Upregulated            3
##  2 -P        glycolipid   Not significant        8
##  3 -P        glycolipid   Upregulated            6
##  4 -P        neutral      Downregulated          2
##  5 -P        neutral      Not significant        1
##  6 -P        neutral      Upregulated            5
##  7 -P        phospholipid Downregulated         10
##  8 -P        phospholipid Not significant        6
##  9 Leaf:-P   neutral      Upregulated            3
## 10 Leaf:-P   phospholipid Downregulated          1
# Significant lipids per predictor
sig_summary <- effects_df %>%
  group_by(predictor) %>%
  summarise(
    total_tested = n(),
    n_significant = sum(is_DL),
    n_upregulated = sum(is_DL & logFC>0),
    n_downregulated = sum(is_DL & logFC<0),
    pct_significant = round(100 * n_significant / total_tested, 1)
  )

print(sig_summary)
## # A tibble: 3 × 6
##   predictor total_tested n_significant n_upregulated n_downregulated
##   <fct>            <int>         <int>         <int>           <int>
## 1 Leaf               135             3             3               0
## 2 -P                 135            38            20              18
## 3 Leaf:-P            135             4             3               1
## # ℹ 1 more variable: pct_significant <dbl>

Export Latex Tables

library(dplyr)

# Prepare lipid data (high-confidence only)
lipid_table_data <- to_plot %>%
  filter(is_hiconf_DL) %>%
  mutate(
    logFC = round(logFC, digits = 2),
    std_err = round(std_err, digits = 2),
    neglogP = round(-log10(adj.P.Val), digits = 2),
    predictor_display = case_when(
      predictor == "-P"        ~ "Phosphorus Deficiency (-P)",
      predictor == "Leaf"      ~ "Leaf Tissue Position (Leaf)",
      predictor == "Leaf:-P"   ~ "Leaf × Phosphorus Interaction (Leaf:-P)",
      TRUE                     ~ as.character(predictor)
    )
  ) %>%
  arrange(predictor, desc(regulation == "Upregulated"), desc(neglogP))


# Function to generate LaTeX
create_lipid_table <- function(df) {
  lines <- c()
  
  lines <- c(lines, "\\begin{table}[h!]")
  lines <- c(lines, "\\centering")
  lines <- c(lines, "\\footnotesize")
  lines <- c(lines, "\\caption{High-confidence differentially abundant lipids across experimental factors, annotated with IUB nomenclature and lipid class.}")
  lines <- c(lines, "\\label{table::differential_lipids}")
  lines <- c(lines, "\\begin{tabular}{|c|c|c|c|}")  # 4 columns now
  lines <- c(lines, "\\hline")
  lines <- c(lines, "\\textbf{Lipid (IUB)} & \\textbf{Class} & \\textbf{$-\\log_{10}(\\textit{FDR})$} & \\textbf{$\\log_2(\\text{FC})$}\\\\")
  lines <- c(lines, "\\hline")
  
  for (pred in unique(df$predictor)) {
    pred_data <- df %>% filter(predictor == pred)
    pred_name <- pred_data$predictor_display[1]
    
    # Predictor section
    lines <- c(lines, sprintf("\\multicolumn{4}{|l|}{\\textit{\\textbf{%s}}} \\\\", pred_name))
    lines <- c(lines, "\\hline")
    
    # Upregulated
    up_data <- pred_data %>% filter(regulation == "Upregulated")
    if (nrow(up_data) > 0) {
      lines <- c(lines, "\\multicolumn{4}{|l|}{\\textit{\\textbf{Upregulated Lipids}}} \\\\")
      lines <- c(lines, "\\hline")
      for (i in 1:nrow(up_data)) {
        row <- up_data[i, ]
        lines <- c(lines, sprintf(
          "%s & %s & %.1f & %.2f\\\\",
          row$label, row$head_group, row$neglogP, row$logFC,row$std_err
        ))
      }
    }
    
    # Downregulated
    down_data <- pred_data %>% filter(regulation == "Downregulated")
    if (nrow(down_data) > 0) {
      lines <- c(lines, "\\multicolumn{4}{|l|}{\\textit{\\textbf{Downregulated Lipids}}} \\\\")
      lines <- c(lines, "\\hline")
      for (i in 1:nrow(down_data)) {
        row <- down_data[i, ]
        lines <- c(lines, sprintf(
          "%s & %s & %.1f & %.2f\\\\",
          row$label, row$head_group, row$neglogP, row$logFC,row$std_err
        ))
      }
    }
    
    lines <- c(lines, "\\hline")
  }
  
  lines <- c(lines, "\\end{tabular}")
  lines <- c(lines, "\\end{table}")
  
  return(paste(lines, collapse = "\n"))
}

# Generate and save
latex_table <- create_lipid_table(lipid_table_data)
writeLines(latex_table, con = file.path(paths$tables, "differential_lipids_table.tex"))

cat("✅ Final lipid table (4 columns, no Notes) saved to:", file.path(paths$tables, "differential_lipids_table.tex"), "\n")
## ✅ Final lipid table (4 columns, no Notes) saved to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/phosphorus_paper/tables/differential_lipids_table.tex

Session Information

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggtext_0.1.2  ggpubr_0.6.2  cowplot_1.2.0 ggrepel_0.9.6 forcats_1.0.1
##  [6] tidyr_1.3.1   dplyr_1.1.4   ggplot2_4.0.1 edgeR_4.8.0   limma_3.66.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       xfun_0.54          bslib_0.9.0        rstatix_0.7.3     
##  [5] lattice_0.22-7     vctrs_0.6.5        tools_4.5.1        generics_0.1.4    
##  [9] tibble_3.3.0       pkgconfig_2.0.3    RColorBrewer_1.1-3 S7_0.2.1          
## [13] lifecycle_1.0.4    stringr_1.6.0      compiler_4.5.1     farver_2.1.2      
## [17] textshaping_1.0.4  statmod_1.5.1      carData_3.0-5      litedown_0.8      
## [21] htmltools_0.5.9    sass_0.4.10        yaml_2.3.12        Formula_1.2-5     
## [25] pillar_1.11.1      car_3.1-3          jquerylib_0.1.4    cachem_1.1.0      
## [29] abind_1.4-8        commonmark_2.0.0   tidyselect_1.2.1   locfit_1.5-9.12   
## [33] digest_0.6.39      stringi_1.8.7      purrr_1.2.0        labeling_0.4.3    
## [37] rprojroot_2.1.1    fastmap_1.2.0      grid_4.5.1         here_1.0.2        
## [41] colorspace_2.1-2   cli_3.6.5          magrittr_2.0.4     utf8_1.2.6        
## [45] broom_1.0.11       withr_3.0.2        scales_1.4.0       backports_1.5.0   
## [49] rmarkdown_2.30     ggsignif_0.6.4     ragg_1.5.0         evaluate_1.0.5    
## [53] knitr_1.50         viridisLite_0.4.2  markdown_2.0       rlang_1.1.6       
## [57] gridtext_0.1.5     Rcpp_1.1.0         glue_1.8.0         xml2_1.5.1        
## [61] svglite_2.2.2      jsonlite_2.0.0     R6_2.6.1           systemfonts_1.3.1